#############################################################################
# Data descriptives
#
#############################################################################

rm(list=ls())
graphics.off()

library(data.table)
library(ggplot2)
library(haven)
library(xtable)
library("RColorBrewer")

save_dta = "Data_new"
save_res = "Results"
memory.limit(size=200000)
time0=Sys.time()

#############################################################################
# 1. Data and weather variables
#############################################################################

dt=data.table(read_dta("Data_new/Data_long.dta"))
table(dt$year)

# Save a copy of the original data 
dt0=copy(dt)

# Rename yield
setnames(dt,"cornyield","yield")
summary(dt[,.(yield)])

# Count number of fips per state
dt[,ones:=1]
dt[,nobs:=cumsum(ones),by=fips]
dt[,nc:=sum(ones*(nobs==1)),by=state]
summary(dt$nc)
table(dt[nobs==1,nc])

# Rescaling dd and prec and generating squares
setnames(dt,"dday0C","dd")
dt[,ddr:=dd/183]
dt[,ddr_sq:=ddr^2]
dt[,precr:=prec/183]
dt[,precr_sq:=precr^2]
summary(dt[,.(dd,ddr,ddr_sq,prec,precr,precr_sq)])

# Check dd variation per county
dt[,ddr_sd:=sqrt(var(x=ddr)),by=fips]
dt[,ddr_mean:=mean(x=ddr),by=fips]
summary(dt[,.(ddr_mean,ddr_sd)])

# Generate list of counties: 2,253 counties and 107,004 observations
county_list = unique(dt$fips)
length(county_list)
nrow(dt)

# Keep subset of variables
dt = dt[,.SD,.SDcols=c("state","fips","year","yield","corn_area","ddr","ddr_sq","ddr_mean","ddr_sd","precr","precr_sq","nfips","nf","nobs")]
summary(dt)

# Save
time1=Sys.time()
save.image(paste0(save_dta,"/Data_baseline.RData"))


#############################################################################
# 2. Descriptive plots of the data (unweighted) over time
#############################################################################

rm(list=ls())
load("Data_new/Data_baseline.RData")

##################################
### Plot averages over time 
##################################

dt[,corn_areaK:=corn_area/1000]
dt_plot=dt[,lapply(.SD,mean),by=year,.SDcols=c("precr","ddr","corn_areaK","yield")]
nrow(dt_plot)

# ddr
g=ggplot(dt_plot,aes(x=year)) + theme_bw() +
  theme(title=element_text(size=14),plot.title=element_text(hjust=0.5),
        plot.subtitle=element_text(hjust=0.5), 
        axis.text = element_text(size=18),axis.title = element_text(size=18),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        legend.position = 'none') +
  xlab("Year") 
g+geom_line(aes(y=ddr)) + ylab("Temperature (C)")
ggsave(paste0(save_res,"/Mean_ddr_over_time.pdf"))

# yields
g+geom_line(aes(y=yield)) + ylab("Cornyield (bu/acre)")
ggsave(paste0(save_res,"/Mean_yield_over_time.pdf"))

##################################
### Plot detrended over time 
##################################

# Detrend and then average
dt_aux = copy(dt)
b=lm(dt_aux,formula="yield~year")
dt_aux[,yield_trend:=b$fitted.values]
dt_aux[,corny_det:=yield-yield_trend]
dt_plot=dt_aux[,lapply(.SD,mean),by=year,.SDcols=c("precr","ddr","corn_area","corny_det")]
nrow(dt_plot)
ddr_avg=mean(dt_plot$ddr)

g= ggplot(dt_plot,aes(x=year,y=corny_det))+geom_line() + theme_bw() +
  theme(title=element_text(size=14),plot.title=element_text(hjust=0.5),
        plot.subtitle=element_text(hjust=0.5), 
        axis.text = element_text(size=18),axis.title = element_text(size=18),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        legend.position = 'none')  +
  xlab("Year")
dt_plot[,dd_new:=(ddr-ddr_avg)*10]
g + geom_line(aes(y=dd_new),linetype="dashed") +
  scale_y_continuous("Yield (de-trended) (bu/acre)", 
                     sec.axis = sec_axis(~ . /10 +ddr_avg, name = "Temperature (C)"))
ggsave(paste0(save_res,"/Desc_yield_dt_vs_dd.pdf"),dpi=300)

#############################################################################
# 3. Maps of (unweighted) averages 
#############################################################################

# Bring US map
load("Data_ori/Data_us_map.RData")
source("Code/0. CODE Auxiliary.R")
save_res = "Results"

#####################################
# Map of average temperature 
#####################################

dt_plot=dt[,lapply(.SD,mean),by=fips,.SDcols=c("precr","ddr","corn_areaK","yield")]
nrow(dt_plot)
summary(dt_plot$ddr)
qq= quantile(dt_plot$ddr,probs=seq(0.05,0.95,0.05))
qq
bmin= qq[2]
bmax= qq[18]
plot_map(data=dt_plot[,.(fips,ddr)],trunc=1,bmin=bmin,bmax=bmax,llabel="Temperature (C)   ",save_name="Map_counties_ddr",palette="OrRd",rev=0,nc=9)

#####################################
# Map of average yields
#####################################

summary(dt_plot$yield)
qq= quantile(dt_plot$yield,probs=seq(0.05,0.95,0.05))
qq
bmin= qq[2]
bmax= qq[18]
plot_map(data=dt_plot[,.(fips,yield)],trunc=1,bmin=bmin,bmax=bmax,llabel="Yields (bu/acre)   ",save_name="Map_counties_yield",palette="OrRd",rev=0,nc=9)


#############################################################################
# Within and between variance of temperature
#############################################################################

# SST = SSE + SSR 
dt[,ddr_mean:=lapply(.SD,mean),by="fips",.SDcols="ddr"]
dt[,err:=ddr-ddr_mean]
summary(dt[,.(ddr,ddr_mean,err)])
var(dt$ddr)
var(dt$ddr_mean)
var(dt$err)
# Check
var(dt$ddr) - var(dt$ddr_mean) -var(dt$err)
# SSE / SST: between
var(dt$ddr_mean) / var(dt$ddr)  
# SSE / SST: within
var(dt$err) / var(dt$ddr) 


